# See: https://github.com/openlink/virtuoso-opensource/
# assumes install into ~
# assumes user aidan
# assumes data in ~/wikidata/wikidata.nt.bz2

# Compile Virtuoso
sudo apt-get update
sudo apt-get install autoconf automake libtool flex bison gperf gawk m4 make openssl libssl-dev

# get the v7.2.5.1 tag only (same version as paper)
git clone -b v7.2.5.1 --single-branch https://github.com/openlink/virtuoso-opensource.git
cd virtuoso-opensource/

## SKIP
# increase hard-coded result size limits
# we do not run this as there is an upper limit anyways given by box size
# https://github.com/openlink/virtuoso-opensource/issues/700
# sed -i 's/maxrows := 1024\*1024/maxrows := 16\*1024\*1024/g' libsrc/Wi/sparql_io.sql
## SKIP

./autogen.sh
./configure --prefix=~/virtuoso/
make
make install

emacs ~/.profile   # or ~/.bash_profile
# add to end:
# PATH="$PATH:~/virtuoso/bin/"

source ~/.profile  # or ~/.bash_profile
echo $PATH
# make sure the virtuoso bin path is there

# find out available memory in MB
free -m 
emacs ~/virtuoso/var/lib/virtuoso/db/virtuoso.ini
# set NumberOfBuffers to MB_RAM*80 / our experiments used 90000 * 80 = 7200000
# set MaxDirtyBuffers to MB_RAM*60 / our experiments used 90000 * 60 = 5400000
# set MaxCheckpointRemap for [Database] to MB_RAM*20 / our experiments used 90000 * 20 = 1800000
# add data dir ~/wikidata/ to list of DirsAllowed
# revise ResultSetMaxRows / our experiments set this to 100000000, in any case code hard-limits to 1024*1024
# revise MaxQueryCostEstimationTime / our experiments commented this out with ';' before the line removing the limit
# revise MaxQueryExecutionTime / our experiments used 600 for 10 minute timeouts


cd ~/virtuoso/var/lib/virtuoso/db/
virtuoso-t &                         # can take a minute or two to start
tail -f virtuoso.log                 #   log is in current directory, wait for "Server online at 1111", then ctrl+c

# Virtuoso has a problem with geo-datatypes
#   changing this stops them from being parsed
#    takes a while :/ might be worth using screen
#
# If the input data are .gz, use zcat instead of bzcat
bzcat ~/wikidata/wikidata.nt.bz2 | sed "s/#wktLiteral/#wktliteral/g" | gzip > ~/wikidata/wikidata-c.nt.gz

# grab the following bulk loader script
cd ~/
git clone https://github.com/amoya87/wikidata-virtuoso.git

emacs ~/wikidata-virtuoso/scripts/load/load_data.sql
# point the first arg of ld_add to ~/wikidata/wikidata-c.nt.gz


# now run bulk load from within the folder
#  you may want to put this in screen
cd ~/wikidata-virtuoso/scripts/load/
sh bulk_load.sh

# if it says isql-v not found:
# emacs ~/wikidata-virtuoso/scripts/load/virtuoso-run-script.sh
#  and change isql-v to isql

# restart virtuoso
cd ~/virtuoso/var/lib/virtuoso/db/
virtuoso-t &                         # can take a minute or two to start
tail -f virtuoso.log                 #   log is in current directory, wait for "Server online at 1111", then ctrl+c

# remove default data
#  (this may reappear on restart of Virtuoso!)
isql
SPARQL CLEAR GRAPH  <http://www.openlinksw.com/schemas/virtrdf#>; 
SPARQL CLEAR GRAPH <http://www.w3.org/2002/07/owl#>;
SPARQL CLEAR GRAPH <http://localhost:8890/sparql>;
SPARQL CLEAR GRAPH <http://localhost:8890/DAV/>;
SPARQL CLEAR GRAPH <http://www.w3.org/ns/ldp#>;
CHECKPOINT;

